R Markdown

Esse documento tem como objetivo tentar entender os dados relacionados ao COVID-19

#Lendo dados referente ao COVID-19
global_data <- read.csv2("dataset/WHO-COVID-19-global-data.csv", header = TRUE, sep = ",") %>% 
  clean_names() %>% 
  arrange(date_reported, country)

#Transformando para formato de data
global_data <- global_data %>% 
  mutate(date_reported = as.Date(date_reported, format = "%Y-%m-%d"))

#Pegando a data inicial e a última data dos dados
start_date <- min(global_data$date_reported)
last_date <- max(global_data$date_reported)

population <- read.csv2("dataset/population_by_country_2020.csv", header = TRUE, sep = ",") %>% 
  clean_names() %>% 
  rename(country = country_or_dependency) %>% 
  mutate(country = as.character(country)) %>% 
  mutate(country = if_else(country == "United States", "United States of America", country))  %>% 
  mutate(country = if_else(country == "United Kingdom", "The United Kingdom", country))   %>% 
  mutate(country = if_else(country == "Iran", "Iran (Islamic Republic of)", country)) 

#Juntando os dados da covid com a população mundial
global_data <- merge(global_data, population, by = "country")

global_data <- global_data %>% 
  mutate(cumulative_cases = if_else(new_cases < 0, -new_cases + cumulative_cases, cumulative_cases)) %>% 
  mutate(rate_deaths_population = as.numeric(sprintf("%0.4f", cumulative_deaths*100/population_2020))) %>% 
  mutate(rate_cases_population = as.numeric(sprintf("%0.4f", cumulative_cases*100/population_2020))) %>% 
  mutate(rate_deaths_cases = as.numeric(sprintf("%0.4f", cumulative_deaths*100/cumulative_cases))) %>% 
  group_by(country) %>% 
  complete(date_reported = seq.Date(start_date, last_date, by="day")) %>%
  ungroup()
top_covid <- function(data = NULL, # data frame
                    h = 10, # head
                    dt, # last date
                    ycol, # columns used in arrange
                    title, # title to plot
                    xlab, # xlab to plot
                    ylab # ylab to plot
                    ){
  
  #Pegando os top países com mais casos no data frame
  top_data <- data %>%
    filter( date_reported == dt) %>%
    arrange(desc(!!sym(ycol))) %>%
    head(h) %>%
    select(country)

  #Filtrando os dados dos top países no data frame
  data_from_top <- data %>%
    filter(country %in% top_data$country)

  #Plotando o gráfico 
  p <- data_from_top %>%
    ggplot( aes(x = date_reported, y = !!sym(ycol), colour = country) ) +
    geom_line() +
    ggtitle(title) +
    xlab(xlab) +
    ylab(ylab)
  
  plotly::ggplotly(p)
}
top_covid(global_data, 10, last_date, "cumulative_deaths",
            "Top 10 Countrys - Deaths Covid 19", "Date", "Cumulative Deaths")
top_covid(global_data, 10, last_date, "cumulative_cases",
            "Top 10 Countrys - Cases Covid 19", "Date", "Cumulative Cases")
#Pegando os 10 países com mais taxa de mortes sobre a população total
top_covid(global_data, 10, last_date, "rate_deaths_population",
            "Top 10 Countrys - Death rate by population - Covid 19", "Date", "Death Rate")
#Pegando os 10 países com mais taxa de infectados sobre a população total
top_covid(global_data, 10, last_date, "rate_cases_population",
            "Top 10 Countrys - Infected rate by population - Covid 19", "Date", "Infected Rate")
#Pegando os 10 países com mais taxa de infectados sobre a população total
top_covid(global_data, 10, last_date, "rate_deaths_cases",
            "Top 10 Countrys - Deaths by Infected rate - Covid 19", "Date", "Death by Infected Rate")
#Carregando o nome das colunas
only_numeric_data <- select_if(global_data, is.numeric) %>% 
  na.omit()
##                        new_cases cumulative_cases new_deaths cumulative_deaths
## new_cases                   1.00             0.78       0.78              0.67
## cumulative_cases            0.78             1.00       0.61              0.94
## new_deaths                  0.78             0.61       1.00              0.60
## cumulative_deaths           0.67             0.94       0.60              1.00
## population_2020             0.20             0.18       0.14              0.14
## net_change                  0.18             0.13       0.11              0.08
## density_p_km               -0.02            -0.02      -0.02             -0.02
## land_area_km                0.41             0.37       0.32              0.31
## rate_deaths_population      0.14             0.26       0.19              0.40
## rate_cases_population       0.14             0.19       0.10              0.20
## rate_deaths_cases           0.06             0.10       0.13              0.20
##                        population_2020 net_change density_p_km land_area_km
## new_cases                         0.20       0.18        -0.02         0.41
## cumulative_cases                  0.18       0.13        -0.02         0.37
## new_deaths                        0.14       0.11        -0.02         0.32
## cumulative_deaths                 0.14       0.08        -0.02         0.31
## population_2020                   1.00       0.86        -0.02         0.55
## net_change                        0.86       1.00        -0.03         0.39
## density_p_km                     -0.02      -0.03         1.00        -0.07
## land_area_km                      0.55       0.39        -0.07         1.00
## rate_deaths_population           -0.03      -0.06        -0.01        -0.01
## rate_cases_population            -0.06      -0.09         0.09        -0.05
## rate_deaths_cases                 0.02       0.01        -0.06         0.02
##                        rate_deaths_population rate_cases_population
## new_cases                                0.14                  0.14
## cumulative_cases                         0.26                  0.19
## new_deaths                               0.19                  0.10
## cumulative_deaths                        0.40                  0.20
## population_2020                         -0.03                 -0.06
## net_change                              -0.06                 -0.09
## density_p_km                            -0.01                  0.09
## land_area_km                            -0.01                 -0.05
## rate_deaths_population                   1.00                  0.63
## rate_cases_population                    0.63                  1.00
## rate_deaths_cases                        0.32                  0.06
##                        rate_deaths_cases
## new_cases                           0.06
## cumulative_cases                    0.10
## new_deaths                          0.13
## cumulative_deaths                   0.20
## population_2020                     0.02
## net_change                          0.01
## density_p_km                       -0.06
## land_area_km                        0.02
## rate_deaths_population              0.32
## rate_cases_population               0.06
## rate_deaths_cases                   1.00

graf_cases_deaths <-  
  global_data %>% 
  filter(country == "Brazil") %>% 
  ggplot(aes(y=new_cases, x=new_deaths)) +
  geom_point(aes(col=new_cases)) +
  geom_smooth(method="loess") +
  labs(subtitle = "Novos Casos vs Novas Mortes - Brasil",
       x = "Novas mortes", y = "Novos casos", color = "Casos vs Mortes")
 
plotly::ggplotly(graf_cases_deaths)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 46 rows containing non-finite values (stat_smooth).
#Regressão linear
lm(data = global_data, formula = cumulative_cases ~ cumulative_deaths)
## 
## Call:
## lm(formula = cumulative_cases ~ cumulative_deaths, data = global_data)
## 
## Coefficients:
##       (Intercept)  cumulative_deaths  
##           1102.00              14.43